#coding:utf-8

import numpy as np
import statsmodels.stats.outliers_influence as oi
import statsmodels.formula.api as smf
import pandas as pd
import sys

sys.setrecursionlimit(5000)

reload(sys)
sys.setdefaultencoding( "utf-8" )

def get_path(dir):
    if dir.find('\\')>0:
        inpath = '/'.join(dir.split('\\'))
    if dir.find('/')>0:
        inpath = dir
    uipath = unicode(inpath, "utf8")
    return uipath

def get_VIF(csv_dir, column_list):
    '''
    :param csv_dir:  the file of the data, just support csv file
    :param column_list:  the columns of the variable
    :return: variable and the VIF 大于10，表示共线性明显, 大于5小于10，表示有共线性
    '''
    dataframe = pd.read_csv(get_path(csv_dir),encoding='gb18030',header=0)

    for item in column_list:
        other_item_list = [i for i in column_list if i<>item]
        fml = item +'~' + '+'.join(other_item_list)
        r2 = smf.ols(formula = fml, data = dataframe).fit().rsquared
        vif = 1/(1-r2)
        print item,vif

csv_dir = 'C:\\Users\\zhishu\\Desktop\\chi_merge.csv'
var_list =[
'gender',
'age',
'buy_month',
'avg_cnt',
'std_cnt',
'avg_price',
'std_price',
'avg_price_ratio',
'std_price_ratio',
'cnt_ratio_11',
'cnt_ratio_1101',
'cnt_ratio_1201',
'cnt_ratio_120886001',
'cnt_ratio_120950002',
'cnt_ratio_121266001',
'cnt_ratio_121536003',
'cnt_ratio_121536007',
'cnt_ratio_122650005',
'cnt_ratio_122684003',
'cnt_ratio_122718004',
'cnt_ratio_122852001',
'cnt_ratio_122928002',
'cnt_ratio_122950001',
'cnt_ratio_122952001',
'cnt_ratio_124024001',
'cnt_ratio_124044001',
'cnt_ratio_124050001',
'cnt_ratio_124242008',
'cnt_ratio_124354002',
'cnt_ratio_124458005',
'cnt_ratio_124466001',
'cnt_ratio_124468001',
'cnt_ratio_124470001',
'cnt_ratio_124484008',
'cnt_ratio_124750013',
'cnt_ratio_14',
'cnt_ratio_1512',
'cnt_ratio_16',
'cnt_ratio_1625',
'cnt_ratio_1801',
'cnt_ratio_20',
'cnt_ratio_21',
'cnt_ratio_23',
'cnt_ratio_25',
'cnt_ratio_26',
'cnt_ratio_27',
'cnt_ratio_28',
'cnt_ratio_2813',
'cnt_ratio_29',
'cnt_ratio_30',
'cnt_ratio_33',
'cnt_ratio_34',
'cnt_ratio_35',
'cnt_ratio_40',
'cnt_ratio_50002766',
'cnt_ratio_50002768',
'cnt_ratio_50004958',
'cnt_ratio_50006842',
'cnt_ratio_50006843',
'cnt_ratio_50007216',
'cnt_ratio_50007218',
'cnt_ratio_50008075',
'cnt_ratio_50008090',
'cnt_ratio_50008141',
'cnt_ratio_50008163',
'cnt_ratio_50008164',
'cnt_ratio_50008165',
'cnt_ratio_50008907',
'cnt_ratio_50010404',
'cnt_ratio_50010728',
'cnt_ratio_50010788',
'cnt_ratio_50011397',
'cnt_ratio_50011665',
'cnt_ratio_50011699',
'cnt_ratio_50011740',
'cnt_ratio_50011949',
'cnt_ratio_50011972',
'cnt_ratio_50012029',
'cnt_ratio_50012082',
'cnt_ratio_50012100',
'cnt_ratio_50012164',
'cnt_ratio_50013864',
'cnt_ratio_50013886',
'cnt_ratio_50014811',
'cnt_ratio_50014812',
'cnt_ratio_50014927',
'cnt_ratio_50016348',
'cnt_ratio_50016349',
'cnt_ratio_50016422',
'cnt_ratio_50016891',
'cnt_ratio_50017300',
'cnt_ratio_50018004',
'cnt_ratio_50018222',
'cnt_ratio_50018264',
'cnt_ratio_50019095',
'cnt_ratio_50019780',
'cnt_ratio_50020275',
'cnt_ratio_50020332',
'cnt_ratio_50020485',
'cnt_ratio_50020579',
'cnt_ratio_50020611',
'cnt_ratio_50020808',
'cnt_ratio_50020857',
'cnt_ratio_50022517',
'cnt_ratio_50022703',
'cnt_ratio_50023282',
'cnt_ratio_50023575',
'cnt_ratio_50023717',
'cnt_ratio_50023722',
'cnt_ratio_50023724',
'cnt_ratio_50023804',
'cnt_ratio_50023878',
'cnt_ratio_50023904',
'cnt_ratio_50024451',
'cnt_ratio_50024612',
'cnt_ratio_50024971',
'cnt_ratio_50025004',
'cnt_ratio_50025110',
'cnt_ratio_50025111',
'cnt_ratio_50025705',
'cnt_ratio_50025707',
'cnt_ratio_50026316',
'cnt_ratio_50026523',
'cnt_ratio_50026555',
'cnt_ratio_50026800',
'cnt_ratio_50050359',
'cnt_ratio_50050471',
'cnt_ratio_50074001',
'cnt_ratio_50158001',
'cnt_ratio_50454031',
'cnt_ratio_50468001',
'cnt_ratio_50510002',
'cnt_ratio_99',
'price_ratio_11',
'price_ratio_1101',
'price_ratio_1201',
'price_ratio_120886001',
'price_ratio_120950002',
'price_ratio_121266001',
'price_ratio_121536003',
'price_ratio_121536007',
'price_ratio_122650005',
'price_ratio_122684003',
'price_ratio_122718004',
'price_ratio_122852001',
'price_ratio_122928002',
'price_ratio_122950001',
'price_ratio_122952001',
'price_ratio_124024001',
'price_ratio_124044001',
'price_ratio_124050001',
'price_ratio_124242008',
'price_ratio_124354002',
'price_ratio_124458005',
'price_ratio_124466001',
'price_ratio_124468001',
'price_ratio_124470001',
'price_ratio_124484008',
'price_ratio_124750013',
'price_ratio_14',
'price_ratio_1512',
'price_ratio_16',
'price_ratio_1625',
'price_ratio_1801',
'price_ratio_20',
'price_ratio_21',
'price_ratio_23',
'price_ratio_25',
'price_ratio_26',
'price_ratio_27',
'price_ratio_28',
'price_ratio_2813',
'price_ratio_29',
'price_ratio_30',
'price_ratio_33',
'price_ratio_34',
'price_ratio_35',
'price_ratio_40',
'price_ratio_50002766',
'price_ratio_50002768',
'price_ratio_50004958',
'price_ratio_50006842',
'price_ratio_50006843',
'price_ratio_50007216',
'price_ratio_50007218',
'price_ratio_50008075',
'price_ratio_50008090',
'price_ratio_50008141',
'price_ratio_50008163',
'price_ratio_50008164',
'price_ratio_50008165',
'price_ratio_50008907',
'price_ratio_50010404',
'price_ratio_50010728',
'price_ratio_50010788',
'price_ratio_50011397',
'price_ratio_50011665',
'price_ratio_50011699',
'price_ratio_50011740',
'price_ratio_50011949',
'price_ratio_50011972',
'price_ratio_50012029',
'price_ratio_50012082',
'price_ratio_50012100',
'price_ratio_50012164',
'price_ratio_50013864',
'price_ratio_50013886',
'price_ratio_50014811',
'price_ratio_50014812',
'price_ratio_50014927',
'price_ratio_50016348',
'price_ratio_50016349',
'price_ratio_50016422',
'price_ratio_50016891',
'price_ratio_50017300',
'price_ratio_50018004',
'price_ratio_50018222',
'price_ratio_50018264',
'price_ratio_50019095',
'price_ratio_50019780',
'price_ratio_50020275',
'price_ratio_50020332',
'price_ratio_50020485',
'price_ratio_50020579',
'price_ratio_50020611',
'price_ratio_50020808',
'price_ratio_50020857',
'price_ratio_50022517',
'price_ratio_50022703',
'price_ratio_50023282',
'price_ratio_50023575',
'price_ratio_50023717',
'price_ratio_50023722',
'price_ratio_50023724',
'price_ratio_50023804',
'price_ratio_50023878',
'price_ratio_50023904',
'price_ratio_50024451',
'price_ratio_50024612',
'price_ratio_50024971',
'price_ratio_50025004',
'price_ratio_50025110',
'price_ratio_50025111',
'price_ratio_50025705',
'price_ratio_50025707',
'price_ratio_50026316',
'price_ratio_50026523',
'price_ratio_50026555',
'price_ratio_50026800',
'price_ratio_50050359',
'price_ratio_50050471',
'price_ratio_50074001',
'price_ratio_50158001',
'price_ratio_50454031',
'price_ratio_50468001',
'price_ratio_50510002',
'price_ratio_99',
'local_buycount',
'total_price',
'car_flag',
'house_flag',
'child_flag',
'pet_flag',
'annoy_num',
'annoy_ratio',
'brand_id_num',
'root_cat_id_num',
'b_bc_type_num',
'b_bc_type_num_ratio',
'b_bc_price_ratio',
'brand_effec_price_ratio',
'brand_effec_num_ratio',
'b50_num_ratio',
'b50_ratio',
'b30_num_ratio',
'b30_ratio',
'b10_num_ratio',
'b10_ratio',
'b5_num_ratio',
'b5_ratio',
'active_score',
'spark_001',
'spark_002',
'spark_003',
'spark_004',
'spark_005',
'spark_006',
'spark_007',
'spark_008',
'spark_009',
'spark_010',
'spark_011',
'spark_012',
'spark_013',
'spark_014',
'spark_015',
'spark_016',
'spark_017',
'spark_018',
'spark_019',
'spark_020',
'spark_021',
'spark_022',
'spark_023',
'spark_024',
'spark_025',
'spark_026',
'spark_027',
'spark_028',
'spark_029',
'spark_030',
'spark_031',
'spark_032',
'spark_033',
'spark_034',
'spark_035',
'spark_036',
'spark_037',
'spark_038',
'spark_039',
'spark_040',
'spark_041',
'spark_042',
'spark_043',
'spark_044',
'spark_045',
'spark_046',
'spark_047',
'spark_048',
'spark_049',
'spark_050',
'spark_051',
'spark_052',
'spark_053',
'spark_054',
'spark_055',
'spark_056',
'spark_057',
'spark_058',
'spark_059',
'spark_060',
'spark_061',
'spark_062',
'spark_063',
'spark_064',
'spark_065',
'spark_066',
'spark_067',
'spark_068',
'spark_069',
'spark_070',
'spark_071',
'spark_072',
'spark_073',
'spark_074',
'spark_075',
'spark_076',
'spark_077',
'spark_078',
'spark_079',
'spark_080',
'spark_081',
'spark_082',
'spark_083',
'spark_084',
'spark_085',
'spark_086',
'spark_087',
'spark_088',
'spark_089',
'spark_090',
'spark_091',
'spark_092',
'spark_093',
'spark_094',
'spark_095',
'spark_096',
'spark_097',
'spark_098',
'spark_099',
'spark_100',
'spark_101',
'spark_102',
'spark_103',
'spark_104',
'spark_105',
'spark_106',
'spark_107',
'spark_108',
'spark_109',
'spark_110',
'spark_111',
'spark_112',
'spark_113',
'spark_114',
'spark_115',
'spark_116',
'spark_117',
'spark_118',
'spark_119',
'spark_120',
'spark_121',
'spark_122',
'spark_123',
'spark_124',
'spark_125',
'spark_126',
'spark_127',
'spark_128',
'spark_129',
'spark_130',
'spark_131',
'spark_132',
'spark_133',
'spark_134',
'spark_135',
'spark_136',
'spark_137',
'spark_138',
'spark_139',
'spark_140',
'spark_141',
'spark_142',
'spark_143',
'spark_144',
'spark_145',
'spark_146',
'spark_147',
'spark_148',
'spark_149',
'spark_150',
'spark_151',
'spark_152',
'spark_153',
'spark_154',
'spark_155',
'spark_156',
'spark_157',
'spark_158',
'spark_159',
'spark_160',
'spark_161',
'spark_162',
'spark_163',
'spark_164',
'spark_165',
'spark_166',
'spark_167',
'spark_168',
'spark_169',
'spark_170',
'spark_171',
'spark_172',
'spark_173',
'spark_174',
'spark_175',
'spark_176',
'spark_177',
'spark_178',
'spark_179',
'spark_180',
'spark_181',
'spark_182',
'spark_183',
'spark_184',
'spark_185',
'spark_186',
'spark_187',
'spark_188',
'spark_189',
'spark_190',
'spark_191',
'spark_192',
'spark_193',
'spark_194',
'spark_195',
'spark_196',
'spark_197',
'spark_198',
'spark_199',
'spark_200',
'spark_201',
'spark_202',
'spark_203',
'spark_204',
'spark_205',
'spark_206',
'spark_207',
'spark_208',
'spark_209',
'spark_210',
'spark_211',
'spark_212',
'spark_213',
'spark_214',
'spark_215',
'spark_216',
'spark_217',
'spark_218',
'spark_219',
'spark_220',
'spark_221',
'spark_222',
'spark_223',
'spark_224',
'spark_225',
'spark_226',
'spark_227',
'spark_228',
'spark_229',
'spark_230',
'spark_231',
'spark_232',
'spark_233',
'spark_234',
'spark_235',
'spark_236',
'spark_237',
'spark_238',
'spark_239',
'spark_240',
'spark_241',
'spark_242',
'spark_243',
'spark_244',
'spark_245',
'spark_246',
'spark_247',
'spark_248',
'spark_249',
'spark_250',
'spark_251',
'spark_252',
'spark_253',
'spark_254',
'spark_255',
'spark_256',
'spark_257',
'spark_258',
'spark_259',
'spark_260',
'spark_261',
'spark_262',
'spark_263',
'spark_264',
'spark_265',
'spark_266',
'spark_267',
'spark_268',
'spark_269',
'spark_270',
'spark_271',
'spark_272',
'spark_273',
'spark_274',
'spark_275',
'spark_276',
'spark_277',
'spark_278',
'spark_279',
'spark_280',
'spark_281',
'spark_282',
'spark_283',
'spark_284',
'spark_285',
'spark_286',
'spark_287',
'spark_288',
'spark_289',
'spark_290',
'spark_291',
'spark_292',
'spark_293',
'spark_294',
'spark_295',
'spark_296',
'spark_297',
'spark_298',
'spark_299',
'spark_300',
'spark_301',
'spark_302',
'spark_303',
'spark_304',
'spark_305',
'spark_306',
'spark_307',
'spark_308',
'spark_309',
'spark_310',
'spark_311',
'spark_312',
'spark_313',
'spark_314',
'spark_315',
'spark_316',
'spark_317',
'spark_318',
'spark_319',
'spark_320',
'spark_321',
'spark_322',
'spark_323',
'spark_324',
'spark_325',
'spark_326',
'spark_327',
'spark_328',
'spark_329',
'spark_330',
'spark_331',
'spark_332',
'spark_333',
'spark_334',
'spark_335',
'spark_336',
'spark_337',
'spark_338',
'spark_339',
'spark_340',
'spark_341',
'spark_342',
'spark_343',
'spark_344',
'spark_345',
'spark_346',
'spark_347',
'spark_348',
'spark_349',
'spark_350',
'spark_351',
'spark_352',
'spark_353',
'spark_354',
'spark_355',
'spark_356',
'spark_357',
'spark_358',
'spark_359',
'spark_360',
'spark_361',
'spark_362',
'spark_363',
'spark_364',
'spark_365',
'spark_366',
'spark_367',
'spark_368',
'spark_369',
'spark_370',
'spark_371',
'spark_372',
'spark_373',
'spark_374',
'spark_375',
'spark_376',
'spark_377',
'spark_378',
'spark_379',
'spark_380',
'spark_381',
'spark_382',
'spark_383',
'spark_384',
'spark_385',
'spark_386',
'spark_387',
'spark_388',
'spark_389',
'spark_390',
'spark_391',
'spark_392',
'spark_393',
'spark_394',
'spark_395',
'spark_396',
'spark_397',
'spark_398',
'spark_399',
'spark_400',
'spark_401',
'spark_402',
'spark_403',
'spark_404',
'spark_405',
'spark_406',
'spark_407',
'spark_408',
'spark_409',
'spark_410',
'spark_411',
'spark_412',
'spark_413',
'spark_414',
'spark_415',
'spark_416',
'spark_417',
'spark_418',
'spark_419',
'spark_420',
'spark_421',
'spark_422',
'spark_423',
'spark_424',
'spark_425',
'spark_426',
'spark_427',
'spark_428',
'spark_429',
'spark_430',
'spark_431',
'spark_432',
'spark_433',
'spark_434',
'spark_435',
'spark_436',
'spark_437',
'spark_438',
'spark_439',
'spark_440',
'spark_441',
'spark_442',
'spark_443',
'spark_444',
'spark_445',
'spark_446',
'spark_447',
'spark_448',
'spark_449',
'spark_450',
'spark_451',
'spark_452',
'spark_453',
'spark_454',
'spark_455',
'spark_456',
'spark_457',
'spark_458',
'spark_459',
'spark_460',
'spark_461',
'spark_462',
'spark_463',
'spark_464',
'spark_465',
'spark_466',
'spark_467',
'spark_468',
'spark_469',
'spark_470',
'spark_471',
'spark_472',
'spark_473',
'spark_474',
'spark_475',
'spark_476',
'spark_477',
'spark_478',
'spark_479',
'spark_480',
'spark_481',
'spark_482',
'spark_483',
'spark_484',
'spark_485',
'spark_486',
'spark_487',
'spark_488',
'spark_489',
'spark_490',
'spark_491',
'spark_492',
'spark_493',
'spark_494',
'spark_495',
'spark_496',
'spark_497',
'spark_498',
'spark_499',
'spark_500'

]
get_VIF(csv_dir, var_list)